library(dplyr)
library(slingshot)
library(patchwork)
library(slingshot)
library(ggrepel)
library(condiments)
library(tidyr)
library(RColorBrewer)
library(Seurat)
Subset Seurat object
all_fmi<-readRDS(file = "/Users/jose/Documents/ucl_postdoc/trophoblasts/FMI-all-20patients-20240827.rds")
tropho <- subset(all_fmi, CellTypeManual.l1 == "Trophoblast")
rm(all_fmi)
nojj<-unique(tropho$sample_id)[!grepl("F2044JJ",unique(tropho$sample_id))]
tropho <- subset(tropho, sample_id %in% nojj)
# subset to placenta
placenta_fmi<-subset(tropho, Tissue == "PVBP")
rm(tropho)
tropho_stb <- subset(placenta_fmi, CellTypeManual.l3 %in% c("STB","STB-precursor","CTB","CTB-proliferating"))
rm(placenta_fmi)
Generate UMAP
tropho_stb<- FindVariableFeatures(tropho_stb, selection.method = "vst")
tropho_stb <- ScaleData(tropho_stb)
## Centering and scaling data matrix
tropho_stb <- RunPCA(tropho_stb, verbose = FALSE)
# UMAP calculation
tropho_stb <- RunUMAP(tropho_stb, dims = 1:10)
## 17:12:21 UMAP embedding parameters a = 0.9922 b = 1.112
## 17:12:21 Read 21465 rows and found 10 numeric columns
## 17:12:21 Using Annoy for neighbor search, n_neighbors = 30
## 17:12:21 Building Annoy index with metric = cosine, n_trees = 50
## 0% 10 20 30 40 50 60 70 80 90 100%
## [----|----|----|----|----|----|----|----|----|----|
## **************************************************|
## 17:12:23 Writing NN index file to temp file /var/folders/t2/8wy_rh2s7lv64wdz4bbh47km0000gp/T//RtmpdZwNq8/file17a9d1502fa53
## 17:12:23 Searching Annoy index using 1 thread, search_k = 3000
## 17:12:27 Annoy recall = 100%
## 17:12:28 Commencing smooth kNN distance calibration using 1 thread with target n_neighbors = 30
## 17:12:29 Initializing from normalized Laplacian + noise (using RSpectra)
## 17:12:29 Commencing optimization for 200 epochs, with 881908 positive edges
## 17:12:35 Optimization finished
# Create the DimPlot with custom colors
p_celltype<-DimPlot(tropho_stb, reduction = "umap", group.by = "CellTypeManual.l3", label = TRUE, label.size = 3, repel = TRUE, split.by = "Condition")
p_celltype
#####
#convert to SCE
seu_placenta_initial <- as.SingleCellExperiment(tropho_stb, assay = "RNA")
Infer trajectory & pseudotime
seu_placenta <- slingshot(seu_placenta_initial, reducedDim = 'UMAP',
clusterLabels = colData(seu_placenta_initial)$CellTypeManual.l3,start.clus = "CTB-proliferating")
Convert to data.frame to be used in ggplot
seu_placenta1 <- bind_cols(
as.data.frame(reducedDims(seu_placenta)$UMAP),
as.data.frame(colData(seu_placenta)[1:40]),
## need to remove all the slingshot and and tradeseq info. tables and can't be conversted into datafram "fitgam" and "crv" fields
slingPseudotime(seu_placenta)
) %>%
sample_frac(1)
# Create the initial ggplot
p <- ggplot(seu_placenta1, aes(x = umap_1, y = umap_2)) +
geom_point(aes(fill = Condition), col = "grey70", shape = 21) +
theme_minimal() +
scale_fill_viridis_d()
# Add the geom_path layer to the ggplot
curves <- slingCurves(seu_placenta, as.df = TRUE)
p1 <- p +
geom_path(data = curves %>% arrange(Order),
aes(group = Lineage, col = as.character(Lineage)), size = 1.5, arrow = arrow(type = "closed", length = unit(0.15, "inches"))) +
#theme_minimal() +
labs(col = "Lineage")
p1
Imbalance scores in condiments
scores <- condiments::imbalance_score(
Object = seu_placenta1 %>% select(umap_1, umap_2) %>% as.matrix(),
conditions = seu_placenta1$Condition,
k = 20, smooth = 40)
seu_placenta1$scores <- scores$scores
seu_placenta1$scaled_scores <- scores$scaled_scores
The local scores are quite noisy so we can then use local smoothers
to smooth the scores of individual cells. The smoothness is dictated by
the smooth argument. Those smoothed scores were also
computed using the imbalance_score function.
# Create the initial ggplot
p <- ggplot(seu_placenta1, aes(x = umap_1, y = umap_2, col = scaled_scores)) +
geom_point() +
scale_color_viridis_c(option = "C")
# Add the geom_path layer to the ggplot
#curves <- slingCurves(seu_placenta, as.df = TRUE)
p1 <- p +
geom_path(data = curves %>% arrange(Order),
aes(group = Lineage), col = "green", size = 1.5,
arrow = arrow(type = "closed", length = unit(0.15, "inches"))) +
theme_classic() +
labs(col = "Imbalance") +
geom_point(data = curves %>%
group_by(Lineage) %>%
slice(round(seq(1, n(), length.out = 9))),
aes( size = 4),col = "green") +
theme_classic() +
labs(col = "Imbalance")
p1
#ppp4<-ggplot(seu_placenta1, aes(x = umap_1, y = umap_2, col = scaled_scores)) +
# geom_point() +
# scale_color_viridis_c(option = "C")
#ppp4
Imbalance scores in Early
seu_placenta1_early<-subset(seu_placenta1, GA_Category %in% "Early")
scores <- condiments::imbalance_score(
Object = seu_placenta1_early%>% select(umap_1, umap_2) %>% as.matrix(),
conditions = seu_placenta1_early$Condition,
k = 20, smooth = 40)
seu_placenta1_early$scores <- scores$scores
seu_placenta1_early$scaled_scores <- scores$scaled_scores
# Create the initial ggplot
p <- ggplot(seu_placenta1_early, aes(x = umap_1, y = umap_2, col = scaled_scores)) +
geom_point() +
scale_color_viridis_c(option = "C")
# Add the geom_path layer to the ggplot
curves <- slingCurves(seu_placenta, as.df = TRUE)
p1 <- p +
geom_path(data = curves %>% arrange(Order),
aes(group = Lineage), col = "green", size = 1.5,
arrow = arrow(type = "closed", length = unit(0.15, "inches"))) +
theme_classic() +
labs(col = "Imbalance") +
#scale_color_brewer(palette = "Set1")+
geom_point(data = curves %>%
group_by(Lineage) %>%
slice(round(seq(1, n(), length.out = 9))),
aes( size = 4),col = "green") +
theme_classic() +
labs(col = "Imbalance")
#scale_color_brewer(palette = "Set1")
p1
#ppp4<-ggplot(seu_placenta1, aes(x = umap_1, y = umap_2, col = scaled_scores)) +
# geom_point() +
# scale_color_viridis_c(option = "C")
#ppp4
Imbalance scores in Late
seu_placenta1_late<-subset(seu_placenta1, GA_Category %in% "Late")
scores <- condiments::imbalance_score(
Object = seu_placenta1_late%>% select(umap_1, umap_2) %>% as.matrix(),
conditions = seu_placenta1_late$Condition,
k = 20, smooth = 40)
seu_placenta1_late$scores <- scores$scores
seu_placenta1_late$scaled_scores <- scores$scaled_scores
# Create the initial ggplot
p <- ggplot(seu_placenta1_late, aes(x = umap_1, y = umap_2, col = scaled_scores)) +
geom_point() +
scale_color_viridis_c(option = "C")
# Add the geom_path layer to the ggplot
curves <- slingCurves(seu_placenta, as.df = TRUE)
p1 <- p +
geom_path(data = curves %>% arrange(Order),
aes(group = Lineage), col = "green", size = 1.5,
arrow = arrow(type = "closed", length = unit(0.15, "inches"))) +
theme_classic() +
labs(col = "Lineage") +
#scale_color_brewer(palette = "Set1")+
geom_point(data = curves %>%
group_by(Lineage) %>%
slice(round(seq(1, n(), length.out = 9))),
aes( size = 4),col = "green") +
theme_classic() +
labs(col = "Lineage")
#scale_color_brewer(palette = "Set1")
p1
#ppp4<-ggplot(seu_placenta1, aes(x = umap_1, y = umap_2, col = scaled_scores)) +
# geom_point() +
# scale_color_viridis_c(option = "C")
#ppp4
seu_placenta1_GA<-rbind(seu_placenta1_late, seu_placenta1_early)
# Create the initial ggplot
p <- ggplot(seu_placenta1_GA, aes(x = umap_1, y = umap_2, col = scaled_scores)) +
geom_point() +
scale_color_viridis_c(option = "C") + facet_wrap(~GA_Category)
# Add the geom_path layer to the ggplot
curves <- slingCurves(seu_placenta, as.df = TRUE)
p1 <- p +
geom_path(data = curves %>% arrange(Order),
aes(group = Lineage), col = "green", size = 1.5,
arrow = arrow(type = "closed", length = unit(0.15, "inches"))) +
theme_classic() +
labs(col = "Lineage") +
#scale_color_brewer(palette = "Set1")+
geom_point(data = curves %>%
group_by(Lineage) %>%
slice(round(seq(1, n(), length.out = 9))),
aes( size = 4),col = "green") +
theme_classic() +
labs(col = "Lineage")
#scale_color_brewer(palette = "Set1")
p1
#ppp4<-ggplot(seu_placenta1, aes(x = umap_1, y = umap_2, col = scaled_scores)) +
# geom_point() +
# scale_color_viridis_c(option = "C")
#ppp4
seu_placenta1_GA<-rbind(seu_placenta1_late, seu_placenta1_early)
seu_placenta1_GA_nocbt<-subset(seu_placenta1_GA, umap_1 < 5)
# Create the initial ggplot
p <- ggplot(seu_placenta1_GA_nocbt, aes(x = umap_1, y = umap_2, col = scaled_scores)) +
geom_point() +
scale_color_viridis_c(option = "C") #+ facet_wrap(~GA_Category)
# Add the geom_path layer to the ggplot
curves <- slingCurves(seu_placenta, as.df = TRUE)
curves2<-subset(curves, Order > 38)
p1 <- p +
geom_path(data = curves2 %>% arrange(Order),
aes(group = Lineage), col = "green", size = 1.5,
arrow = arrow(type = "closed", length = unit(0.15, "inches"))) +
theme_classic() +
labs(col = "Lineage") +
#scale_color_brewer(palette = "Set1")+
geom_point(data = curves2 %>%
group_by(Lineage) %>%
slice(round(seq(1, n(), length.out = 7))),
aes( size = 4),col = "green") +
theme_classic() +
labs(col = "Lineage")
#scale_color_brewer(palette = "Set1")
p1
Plot pseudotime per cell in UMAP
# Create the initial ggplot
p2 <- ggplot(seu_placenta1, aes(x = umap_1, y = umap_2)) +
geom_point(aes(fill = Lineage1), col = "grey70", shape = 21) +
#theme_minimal() +
scale_fill_viridis_c()
# Add the geom_path layer to the ggplot
p3 <- p2 +
geom_path(data = curves %>% arrange(Order),
aes(group = Lineage, col = as.character(Lineage)), size = 1.5, arrow = arrow(type = "closed", length = unit(0.15, "inches"))) +
#theme_minimal() +
labs(col = "Lineage") +
scale_color_brewer(palette = "Set1")+
geom_point(data = curves %>%
group_by(Lineage) %>%
slice(round(seq(1, n(), length.out = 9))),
aes(col = as.character(Lineage)), size = 4) +
theme_classic() +
labs(col = "Lineage") +
scale_color_brewer(palette = "Set1")
## Scale for colour is already present.
## Adding another scale for colour, which will replace the existing scale.
# Print the updated plot
print(p3)
Function to calculate differential abundance in pseudotime
DAbundance_sum_expression_in_pseudotime <- function(df_sample, lineages = "Lineage1", gest_age = NULL, conditions = c("Control", "PE"), pseudotime_intervals = 1, tropho,genestoplot,expr_factor = 100) {
# Subset data frame by lineages and gestational age if specified
if (!is.null(gest_age)) {
### make the script:
#lineages = "Lineage3"
#gest_age = "Early"
#pseudotime_intervals = 1
#conditions = c("Control", "PE")
dfl1 <- df_sample[df_sample$lineages == lineages & df_sample$GA_Category == gest_age, ]
} else {
dfl1 <- df_sample[df_sample$lineages == lineages, ]
}
# Define the range of pseudotime values
min_pt <- min(dfl1$pseudotime)
max_pt <- max(dfl1$pseudotime)
# Create a sequence of pseudotime values at intervals of specified units
pseudotime_seq <- seq(min_pt, max_pt, by = pseudotime_intervals)
# Initialize density_df outside the loop if not already done
# Initialize empty dataframe to store density values for control and PE samples
density_df <- data.frame(Pseudotime = pseudotime_seq,
n_samples_Control = rep(0, length(pseudotime_seq)),
n_samples_PE = rep(0, length(pseudotime_seq)),
Density_Control = rep(0, length(pseudotime_seq)),
Density_PE = rep(0, length(pseudotime_seq)),
Wilcox_Pvalue = rep(0, length(pseudotime_seq)),
Mean_Control = rep(0, length(pseudotime_seq)),
Mean_PE = rep(0, length(pseudotime_seq)),
expr_score = NA,
done = "no"
)
# Loop through pseudotime sequence
for (i in seq_along(pseudotime_seq)) {
# Subset data for each pseudotime interval
subset_data <- dfl1[dfl1$pseudotime >= pseudotime_seq[i] & dfl1$pseudotime < pseudotime_seq[i] + pseudotime_intervals, ]
# Check if there are any cells in this interval
if (nrow(subset_data) == 0 || length(intersect(rownames(subset_data), colnames(tropho))) == 0) {
message(paste("No cells found for interval", i, ". Skipping..."))
density_df$done[i] <- "yes"
density_df[i,]<-NA
next # Skip to the next iteration
}
# Subset the Seurat object
interval_seurat <- subset(tropho, cells = intersect(rownames(subset_data), colnames(tropho)))
# Check if the subset Seurat object has any cells
if (ncol(interval_seurat) <= 1) {
message(paste("No cells remained after subsetting for interval", i, ". Skipping..."))
next # Skip to the next iteration
}
#genes_of_interest <- list(c("LEP", "LTF", "DERL3"))
#interval_seurat <- AddModuleScore(interval_seurat,
# features = genes_of_interest,
# name = "GeneScore")
#density_df$expr_score[i] <- mean(interval_seurat$GeneScore1)
# Get the expression data for genes of interest
#genes_of_interest <- c("HTRA1","FSTL3","LTF","LEP","FLT1")
genes_of_interest <- genestoplot
gene_expr <- GetAssayData(interval_seurat, slot = "data")[genes_of_interest, ]
# Calculate the sum of expression of all the genes for each cell
if(length(genes_of_interest) == 1 ) {
sum_expr <- t(as.data.frame(gene_expr))} else {
sum_expr <- colSums(gene_expr)}
# Calculate the mean of the sum across all cells in this interval
density_df$expr_score[i] <- mean(sum_expr)
density_df$done[i] <- "yes"
#}
# ggplot(data = density_df, aes(x = pseudotime)) +
# geom_line(aes(y = 10*expr_score, color = 'coral'), size = 1) +
# labs(x = "Pseudotime", y = "Mean Hypoxia score", title = paste(lineages,"and",gest_age)) +
# theme_minimal() +
# theme(legend.position = "top") +
# guides(color = guide_legend(title = NULL))
### This is the DA script:
# Count the number of control and PE cells in the interval
control_count <- sum(subset_data$Condition == conditions[1])
PE_count <- sum(subset_data$Condition == conditions[2])
# Update the density dataframe
density_df$Density_Control[i] <- control_count
density_df$Density_PE[i] <- PE_count
# Count the number of replicates (libraries, samples) per condition
control_samples <- length(unique(subset_data[subset_data$Condition == conditions[1],"sample_id"]))
PE_samples <- length(unique(subset_data[subset_data$Condition == conditions[2],"sample_id"]))
# Update total cells in density dataframe
density_df$n_samples_Control[i] <- control_samples
density_df$n_samples_PE[i] <- PE_samples
# Check if both conditions have enough replications for the test
if (control_samples >= 2 & PE_samples >= 2) {
# Contingency table for Wilcoxon rank-sum test
contingency_table <- table(subset_data$Condition, subset_data$sample_id)
# remove placentas or sn
#contingency_table <- contingency_table[, !grepl("-AB|-sn", colnames(contingency_table))]
# Extract counts for Control and PE conditions
control_v <- as.vector(contingency_table[conditions[1], ])
pe_v <- as.vector(contingency_table[conditions[2], ])
# Remove zeros from the vectors
control_v <- control_v[control_v != 0]
pe_v <- pe_v[pe_v != 0]
# Perform Wilcoxon rank-sum test
test_result <- wilcox.test(pe_v, control_v, exact = F)
# Update Wilcox_Pvalue column in density dataframe
density_df$Wilcox_Pvalue[i] <- test_result$p.value
# Calculate means for PE and Control
Mean_pe <- mean(pe_v)
Mean_control <- mean(control_v)
# Update Sum_Ranks columns in density dataframe
density_df$Mean_Control[i] <- Mean_control
density_df$Mean_PE[i] <- Mean_pe
} else {
# If there are not enough observations for the test, set p-value to 1
density_df$Wilcox_Pvalue[i] <- 1
# Set Sum_Ranks to NA if there are not enough observations
density_df$Mean_PE[i] <- NA
density_df$Mean_Control[i] <- NA
}
}
# Apply Benjamini-Hochberg correction for multiple testing
density_df$Adjusted_Pvalue <- p.adjust(density_df$Wilcox_Pvalue, method = "BH")
# Determine which condition is more abundant in each interval
density_df$More_Abundant <- ifelse(density_df$Density_Control > density_df$Density_PE, conditions[1], conditions[2])
density_df$Difference <- density_df$Density_Control - density_df$Density_PE
# Display significant DA intervals
sig_DA <- density_df[density_df$Adjusted_Pvalue <= 0.2, ]
print(sig_DA)
print(density_df)
# Combine the two variable names with an underscore
if(nrow(sig_DA) >= 1){
new_variable_name <- paste(lineages,gest_age,sep = "_")
sig_DA$test<-new_variable_name
#DA_intervals_df<-rbind(DA_intervals_df,sig_DA)
# Rename the variable
#assign(new_variable_name, sig_DA)
write.csv(sig_DA,paste("~/Documents/ucl_postdoc/cell_trajectories/",lineages,"_",gest_age,".csv", sep = ""))}
# Plot
# Perform correlation tests
cor_expr <- cor.test(density_df$Mean_PE / density_df$Mean_Control, density_df$expr_score)
# Extract correlation and p-value
cor_expr_r <- round(cor_expr$estimate, 2) # Correlation coefficient for IL10
cor_expr_p <- signif(cor_expr$p.value, 2)
# Calculate scaling factor between variables
y1_range <- range(density_df$Mean_PE - density_df$Mean_Control, na.rm = TRUE)
y2_range <- range(density_df$expr_score, na.rm = TRUE)
scaling_factor <- diff(y2_range)/diff(y1_range)
ggplot(data = density_df, aes(x = Pseudotime)) +
geom_vline(xintercept = sig_DA$Pseudotime,
linetype = "solid", color = "lightblue", size = 5) +
geom_hline(yintercept = 0, linetype = "dashed", color = "grey") +
geom_line(aes(y = Mean_PE - Mean_Control,
color = 'Cells PE - Control'), size = 1) +
geom_line(aes(y = (expr_factor * expr_score)/scaling_factor,
color = 'Expr'), size = 1, linetype = "dotted") +
scale_y_continuous(
name = 'Cells PE - Control',
sec.axis = sec_axis(~ . * scaling_factor, name = "Expression"),
expand = expansion(mult = c(0.1, 0.1)) # Add 10% padding
) +
scale_color_manual(values = c(
'Cells PE - Control' = 'blue',
'Expr' = 'purple'
)) +
labs(
x = "Pseudotime",
title = paste("r =", cor_expr_r, ", p =", cor_expr_p)
) +
theme_minimal() +
theme(legend.position = "top",
plot.title = element_text(size = 10)) +
guides(color = guide_legend(title = NULL))
}
Trigger differential abundance analysis
genestoplot <- c("HTRA4","HTRA1","FSTL3","TMEM45A","EGLN3")
seu_placenta1$pseudotime<-seu_placenta1$Lineage1
seu_placenta1$lineages<-"Lineage1"
p1a<-DAbundance_sum_expression_in_pseudotime(seu_placenta1, lineages = "Lineage1", gest_age = NULL,conditions = c("Control", "PE"), pseudotime_intervals = 3,tropho_stb,genestoplot,expr_factor = 1)
## No cells found for interval 3 . Skipping...
## Pseudotime n_samples_Control n_samples_PE Density_Control Density_PE Wilcox_Pvalue Mean_Control Mean_PE expr_score done Adjusted_Pvalue More_Abundant
## NA NA NA NA NA NA NA NA NA NA <NA> NA <NA>
## 6 15 11 13 514 2149 0.0204785872 46.727273 165.30769 2.199327 yes 0.088740544 PE
## 7 18 11 13 78 924 0.0001997948 7.090909 71.07692 4.186597 yes 0.002597333 PE
## 8 21 10 13 55 467 0.0012288529 5.500000 35.92308 4.583954 yes 0.007987544 PE
## Difference
## NA NA
## 6 -1635
## 7 -846
## 8 -412
## Pseudotime n_samples_Control n_samples_PE Density_Control Density_PE Wilcox_Pvalue Mean_Control Mean_PE expr_score done Adjusted_Pvalue More_Abundant
## 1 0 12 13 228 298 0.6828513737 19.000000 22.92308 0.0867085 yes 1.000000000 PE
## 2 3 12 13 185 618 0.3129806223 15.416667 47.53846 0.2688532 yes 0.993004304 PE
## 3 NA NA NA NA NA NA NA NA NA <NA> NA <NA>
## 4 9 0 2 0 2 1.0000000000 NA NA 2.2951013 yes 1.000000000 PE
## 5 12 12 13 900 2154 0.5137787520 75.000000 165.69231 0.9734493 yes 1.000000000 PE
## 6 15 11 13 514 2149 0.0204785872 46.727273 165.30769 2.1993268 yes 0.088740544 PE
## 7 18 11 13 78 924 0.0001997948 7.090909 71.07692 4.1865974 yes 0.002597333 PE
## 8 21 10 13 55 467 0.0012288529 5.500000 35.92308 4.5839539 yes 0.007987544 PE
## 9 24 12 13 1213 1441 1.0000000000 101.083333 110.84615 2.2257067 yes 1.000000000 PE
## 10 27 12 12 1086 940 0.7287511119 90.500000 78.33333 2.0126593 yes 1.000000000 Control
## 11 30 9 11 1046 740 0.3819247323 116.222222 67.27273 1.5477052 yes 0.993004304 Control
## 12 33 12 11 1488 804 0.8053047818 124.000000 73.09091 0.9222128 yes 1.000000000 Control
## 13 36 12 12 2182 1667 0.8624310005 181.833333 138.91667 0.7114181 yes 1.000000000 Control
## 14 39 11 8 183 103 0.7408525339 16.636364 12.87500 0.3641109 yes 1.000000000 Control
## Difference
## 1 -70
## 2 -433
## 3 NA
## 4 -2
## 5 -1254
## 6 -1635
## 7 -846
## 8 -412
## 9 -228
## 10 146
## 11 306
## 12 684
## 13 515
## 14 80
p2a<-DAbundance_sum_expression_in_pseudotime(seu_placenta1, lineages = "Lineage1", conditions = c("Control", "PE"),gest_age = "Early" ,pseudotime_intervals = 3,tropho_stb,genestoplot,expr_factor = 1)
## No cells found for interval 3 . Skipping...
## Pseudotime n_samples_Control n_samples_PE Density_Control Density_PE Wilcox_Pvalue Mean_Control Mean_PE expr_score done Adjusted_Pvalue More_Abundant
## NA NA NA NA NA NA NA NA NA NA <NA> NA <NA>
## 7 18 4 7 11 513 0.01055554 2.75 73.28571 4.678713 yes 0.1372221 PE
## Difference
## NA NA
## 7 -502
## Pseudotime n_samples_Control n_samples_PE Density_Control Density_PE Wilcox_Pvalue Mean_Control Mean_PE expr_score done Adjusted_Pvalue More_Abundant
## 1 0 5 7 150 74 0.07353002 30.00000 10.571429 0.09639887 yes 0.3186301 Control
## 2 3 5 7 98 173 0.87099119 19.60000 24.714286 0.53818786 yes 0.9435738 PE
## 3 NA NA NA NA NA NA NA NA NA <NA> NA <NA>
## 4 9 0 2 0 2 1.00000000 NA NA 2.29510135 yes 1.0000000 PE
## 5 12 5 7 204 509 0.87099119 40.80000 72.714286 1.08535585 yes 0.9435738 PE
## 6 15 4 7 88 888 0.15637572 22.00000 126.857143 2.76522577 yes 0.5082211 PE
## 7 18 4 7 11 513 0.01055554 2.75000 73.285714 4.67871327 yes 0.1372221 PE
## 8 21 4 7 13 137 0.04621820 3.25000 19.571429 4.80753775 yes 0.3004183 PE
## 9 24 5 7 501 691 0.74490219 100.20000 98.714286 2.17395921 yes 0.9435738 PE
## 10 27 5 6 634 235 0.78225207 126.80000 39.166667 1.54996369 yes 0.9435738 Control
## 11 30 3 5 256 111 0.29090092 85.33333 22.200000 0.90644554 yes 0.6366227 Control
## 12 33 5 5 344 118 0.52580895 68.80000 23.600000 0.64428721 yes 0.8544395 Control
## 13 36 5 6 483 320 0.41024790 96.60000 53.333333 0.48103078 yes 0.7618890 Control
## 14 39 5 3 80 20 0.29382585 16.00000 6.666667 0.31782948 yes 0.6366227 Control
## Difference
## 1 76
## 2 -75
## 3 NA
## 4 -2
## 5 -305
## 6 -800
## 7 -502
## 8 -124
## 9 -190
## 10 399
## 11 145
## 12 226
## 13 163
## 14 60
p3a<-DAbundance_sum_expression_in_pseudotime(seu_placenta1, lineages = "Lineage1", conditions = c("Control", "PE"),gest_age = "Late", pseudotime_intervals = 3,tropho_stb,genestoplot,expr_factor = 1)
## No cells found for interval 3 . Skipping...
## No cells found for interval 4 . Skipping...
## Pseudotime n_samples_Control n_samples_PE Density_Control Density_PE Wilcox_Pvalue Mean_Control Mean_PE expr_score done Adjusted_Pvalue More_Abundant
## NA NA NA NA NA NA NA NA NA NA <NA> NA <NA>
## NA.1 NA NA NA NA NA NA NA NA NA <NA> NA <NA>
## 6 15.04979 7 6 421 1257 0.038318763 60.142857 209.50000 1.895712 yes 0.15327505 PE
## 7 18.04979 7 6 67 400 0.011242652 9.571429 66.66667 3.680565 yes 0.06745591 PE
## 8 21.04979 6 6 38 325 0.004998125 6.333333 54.16667 4.526640 yes 0.05997750 PE
## Difference
## NA NA
## NA.1 NA
## 6 -836
## 7 -333
## 8 -287
## Pseudotime n_samples_Control n_samples_PE Density_Control Density_PE Wilcox_Pvalue Mean_Control Mean_PE expr_score done Adjusted_Pvalue More_Abundant
## 1 0.0497923 7 6 79 230 0.174143130 11.285714 38.33333 0.08234964 yes 0.41794351 PE
## 2 3.0497923 7 6 86 439 0.429488462 12.285714 73.16667 0.13068479 yes 0.73626593 PE
## 3 NA NA NA NA NA NA NA NA NA <NA> NA <NA>
## 4 NA NA NA NA NA NA NA NA NA <NA> NA <NA>
## 5 12.0497923 7 6 705 1665 0.224638639 100.714286 277.50000 0.94051534 yes 0.44927728 PE
## 6 15.0497923 7 6 421 1257 0.038318763 60.142857 209.50000 1.89571236 yes 0.15327505 PE
## 7 18.0497923 7 6 67 400 0.011242652 9.571429 66.66667 3.68056458 yes 0.06745591 PE
## 8 21.0497923 6 6 38 325 0.004998125 6.333333 54.16667 4.52663981 yes 0.05997750 PE
## 9 24.0497923 7 6 718 758 0.617075077 102.571429 126.33333 2.26724496 yes 0.86474027 PE
## 10 27.0497923 7 6 457 707 0.133614403 65.285714 117.83333 2.36135889 yes 0.40084321 PE
## 11 30.0497923 6 6 794 626 0.936186293 132.333333 104.33333 1.69935966 yes 0.93618629 Control
## 12 33.0497923 7 6 1166 706 0.720616893 166.571429 117.66667 0.98173188 yes 0.86474027 Control
## 13 36.0497923 7 6 1678 1331 0.830324258 239.714286 221.83333 0.76887975 yes 0.90580828 Control
## 14 39.0497923 6 5 87 72 0.713753631 14.500000 14.40000 0.36973994 yes 0.86474027 Control
## Difference
## 1 -151
## 2 -353
## 3 NA
## 4 NA
## 5 -960
## 6 -836
## 7 -333
## 8 -287
## 9 -40
## 10 -250
## 11 168
## 12 460
## 13 347
## 14 15
p1a+p2a+p3a
## Warning: Removed 1 row containing missing values or values outside the scale range (`geom_vline()`).
## Warning: Removed 1 row containing missing values or values outside the scale range (`geom_line()`).
## Removed 1 row containing missing values or values outside the scale range (`geom_line()`).
## Warning: Removed 1 row containing missing values or values outside the scale range (`geom_vline()`).
## Warning: Removed 1 row containing missing values or values outside the scale range (`geom_line()`).
## Removed 1 row containing missing values or values outside the scale range (`geom_line()`).
## Warning: Removed 2 rows containing missing values or values outside the scale range (`geom_vline()`).
## Warning: Removed 2 rows containing missing values or values outside the scale range (`geom_line()`).
## Removed 2 rows containing missing values or values outside the scale range (`geom_line()`).
p2a+p3a
## Warning: Removed 1 row containing missing values or values outside the scale range (`geom_vline()`).
## Warning: Removed 1 row containing missing values or values outside the scale range (`geom_line()`).
## Removed 1 row containing missing values or values outside the scale range (`geom_line()`).
## Warning: Removed 2 rows containing missing values or values outside the scale range (`geom_vline()`).
## Warning: Removed 2 rows containing missing values or values outside the scale range (`geom_line()`).
## Removed 2 rows containing missing values or values outside the scale range (`geom_line()`).
Function to calculate differential abundance in proportion to total
trophoblasts per donor
Proportion_DAbundance_sum_expression_in_pseudotime <- function(df_sample, lineages = "Lineage1", gest_age = NULL, conditions = c("Control", "PE"), pseudotime_intervals = 1, tropho,genestoplot,expr_factor = 100) {
# Subset data frame by lineages and gestational age if specified
### make the script:
#df_sample<-seu_placenta1
#lineages = "Lineage1"
#gest_age = NULL
#conditions = c("Control", "PE")
#pseudotime_intervals = 3
#tropho<-tropho_stb
#expr_factor = 25
total_trophos_sample<-table(df_sample$sample_id)
total_trophos_sample_df <- as.data.frame(t(as.matrix(total_trophos_sample)))
if (!is.null(gest_age)) {
dfl1 <- df_sample[df_sample$lineages == lineages & df_sample$GA_Category == gest_age, ]
} else {
dfl1 <- df_sample[df_sample$lineages == lineages, ]
}
# Define the range of pseudotime values
min_pt <- min(dfl1$pseudotime)
max_pt <- max(dfl1$pseudotime)
# Create a sequence of pseudotime values at intervals of specified units
pseudotime_seq <- seq(min_pt, max_pt, by = pseudotime_intervals)
# Initialize density_df outside the loop if not already done
# Initialize empty dataframe to store density values for control and PE samples
density_df <- data.frame(Pseudotime = pseudotime_seq,
n_samples_Control = rep(0, length(pseudotime_seq)),
n_samples_PE = rep(0, length(pseudotime_seq)),
Density_Control = rep(0, length(pseudotime_seq)),
Density_PE = rep(0, length(pseudotime_seq)),
Wilcox_Pvalue = rep(0, length(pseudotime_seq)),
Mean_Control = rep(0, length(pseudotime_seq)),
Mean_PE = rep(0, length(pseudotime_seq)),
expr_score = NA,
done = "no"
)
# Loop through pseudotime sequence
for (i in seq_along(pseudotime_seq)) {
# Subset data for each pseudotime interval
subset_data <- dfl1[dfl1$pseudotime >= pseudotime_seq[i] & dfl1$pseudotime < pseudotime_seq[i] + pseudotime_intervals, ]
# Check if there are any cells in this interval
if (nrow(subset_data) == 0 || length(intersect(rownames(subset_data), colnames(tropho))) == 0) {
message(paste("No cells found for interval", i, ". Skipping..."))
density_df$done[i] <- "yes"
density_df[i,]<-NA
next # Skip to the next iteration
}
# Subset the Seurat object
interval_seurat <- subset(tropho, cells = intersect(rownames(subset_data), colnames(tropho)))
# Check if the subset Seurat object has any cells
if (ncol(interval_seurat) <= 1) {
message(paste("No cells remained after subsetting for interval", i, ". Skipping..."))
next # Skip to the next iteration
}
#genes_of_interest <- list(c("LEP", "LTF", "DERL3"))
#interval_seurat <- AddModuleScore(interval_seurat,
# features = genes_of_interest,
# name = "GeneScore")
#density_df$expr_score[i] <- mean(interval_seurat$GeneScore1)
# Get the expression data for genes of interest
#genes_of_interest <- c("HTRA1","FSTL3","LTF","LEP","FLT1")
genes_of_interest <- genestoplot
gene_expr <- GetAssayData(interval_seurat, slot = "data")[genes_of_interest, ]
# Calculate the sum of expression of all the genes for each cell
if(length(genes_of_interest) == 1 ) {
sum_expr <- t(as.data.frame(gene_expr))} else {
sum_expr <- colSums(gene_expr)}
# Calculate the mean of the sum across all cells in this interval
density_df$expr_score[i] <- mean(sum_expr)
density_df$done[i] <- "yes"
#}
# ggplot(data = density_df, aes(x = pseudotime)) +
# geom_line(aes(y = 10*expr_score, color = 'coral'), size = 1) +
# labs(x = "Pseudotime", y = "Mean Hypoxia score", title = paste(lineages,"and",gest_age)) +
# theme_minimal() +
# theme(legend.position = "top") +
# guides(color = guide_legend(title = NULL))
### This is the DA script:
# Count the number of control and PE cells in the interval
control_count <- sum(subset_data$Condition == conditions[1])
PE_count <- sum(subset_data$Condition == conditions[2])
# Update the density dataframe
density_df$Density_Control[i] <- control_count
density_df$Density_PE[i] <- PE_count
# Count the number of replicates (libraries, samples) per condition
control_samples <- length(unique(subset_data[subset_data$Condition == conditions[1],"sample_id"]))
PE_samples <- length(unique(subset_data[subset_data$Condition == conditions[2],"sample_id"]))
# Update total cells in density dataframe
density_df$n_samples_Control[i] <- control_samples
density_df$n_samples_PE[i] <- PE_samples
# Check if both conditions have enough replications for the test
if (control_samples >= 2 & PE_samples >= 2) {
# Contingency table for Wilcoxon rank-sum test
contingency_table <- table(subset_data$Condition, subset_data$sample_id)
# remove placentas or sn
#contingency_table <- contingency_table[, !grepl("-AB|-sn", colnames(contingency_table))]
contingency_df <- as.data.frame.matrix(contingency_table)
# Ensure only the common columns are kept
common_cols <- intersect(colnames(contingency_df), colnames(total_trophos_sample_df))
total_trophos_sample_filtered <- total_trophos_sample_df %>% select(all_of(common_cols))
# Merge by row binding
merged_table <- rbind(contingency_df, total_trophos_sample_filtered)
merged_table["C_prop",]<-merged_table[1,]/merged_table[3,]
merged_table["PE_prop",]<-merged_table[2,]/merged_table[3,]
# Extract counts for Control and PE conditions
control_v <- as.vector(unlist(merged_table["C_prop",]))
pe_v <- as.vector(unlist(merged_table["PE_prop",]))
# Remove zeros from the vectors
control_v <- control_v[control_v != 0]
pe_v <- pe_v[pe_v != 0]
# Perform Wilcoxon rank-sum test
test_result <- wilcox.test(pe_v, control_v, exact = F)
# Update Wilcox_Pvalue column in density dataframe
density_df$Wilcox_Pvalue[i] <- test_result$p.value
# Calculate means for PE and Control
Mean_pe <- mean(pe_v)
Mean_control <- mean(control_v)
# Update Sum_Ranks columns in density dataframe
density_df$Mean_Control[i] <- Mean_control
density_df$Mean_PE[i] <- Mean_pe
} else {
# If there are not enough observations for the test, set p-value to 1
density_df$Wilcox_Pvalue[i] <- 1
# Set Sum_Ranks to NA if there are not enough observations
density_df$Mean_PE[i] <- NA
density_df$Mean_Control[i] <- NA
}
}
# Apply Benjamini-Hochberg correction for multiple testing
density_df$Adjusted_Pvalue <- p.adjust(density_df$Wilcox_Pvalue, method = "BH")
# Determine which condition is more abundant in each interval
density_df$More_Abundant <- ifelse(density_df$Density_Control > density_df$Density_PE, conditions[1], conditions[2])
density_df$Difference <- density_df$Density_Control - density_df$Density_PE
# Display significant DA intervals
sig_DA <- density_df[density_df$Adjusted_Pvalue <= 0.2, ]
print(sig_DA)
print(density_df)
# Combine the two variable names with an underscore
if(nrow(sig_DA) >= 1){
new_variable_name <- paste(lineages,gest_age,sep = "_")
sig_DA$test<-new_variable_name
#DA_intervals_df<-rbind(DA_intervals_df,sig_DA)
# Rename the variable
#assign(new_variable_name, sig_DA)
write.csv(sig_DA,paste("~/Documents/ucl_postdoc/cell_trajectories/",lineages,"_",gest_age,".csv", sep = ""))}
# Plot
# Perform correlation tests
cor_expr <- cor.test(density_df$Mean_PE / density_df$Mean_Control, density_df$expr_score)
# Extract correlation and p-value
cor_expr_r <- round(cor_expr$estimate, 2) # Correlation coefficient for IL10
cor_expr_p <- signif(cor_expr$p.value, 2)
#expr_factor = 1
# Add correlation results to the plot title
# Calculate scaling factor between variables
y1_range <- range(density_df$Mean_PE - density_df$Mean_Control, na.rm = TRUE)
y2_range <- range(density_df$expr_score, na.rm = TRUE)
scaling_factor <- diff(y2_range)/diff(y1_range)
ggplot(data = density_df, aes(x = Pseudotime)) +
geom_vline(xintercept = sig_DA$Pseudotime,
linetype = "solid", color = "lightblue", size = 5) +
geom_hline(yintercept = 0, linetype = "dashed", color = "grey") +
geom_line(aes(y = Mean_PE - Mean_Control,
color = 'Proportion Cells PE - Control'), size = 1) +
geom_line(aes(y = (expr_score)/scaling_factor,
color = 'Expr'), size = 1, linetype = "dotted") +
scale_y_continuous(
name = 'Proportion Cells PE - Control',
sec.axis = sec_axis(~ . * scaling_factor, name = "Expression"),
expand = expansion(mult = c(0.1, 0.1)) # Add 10% padding
) +
scale_color_manual(values = c(
'Proportion Cells PE - Control' = 'blue',
'Expr' = 'purple'
)) +
labs(
x = "Pseudotime",
title = paste("r =", cor_expr_r, ", p =", cor_expr_p)
) +
theme_minimal() +
theme(legend.position = "top",
plot.title = element_text(size = 10)) +
guides(color = guide_legend(title = NULL))
}
genestoplot <- c("HTRA4","HTRA1","FSTL3","TMEM45A","EGLN3")
seu_placenta1$pseudotime<-seu_placenta1$Lineage1
seu_placenta1$lineages<-"Lineage1"
p1a<-Proportion_DAbundance_sum_expression_in_pseudotime(seu_placenta1, lineages = "Lineage1", gest_age = NULL,conditions = c("Control", "PE"), pseudotime_intervals = 3,tropho_stb,genestoplot,expr_factor = 1)
## No cells found for interval 3 . Skipping...
## Pseudotime n_samples_Control n_samples_PE Density_Control Density_PE Wilcox_Pvalue Mean_Control Mean_PE expr_score done Adjusted_Pvalue More_Abundant
## NA NA NA NA NA NA NA NA NA NA <NA> NA <NA>
## 6 15 11 13 514 2149 0.0021360271 0.06711436 0.178501573 2.1993268 yes 0.009256118 PE
## 7 18 11 13 78 924 0.0004091209 0.01457911 0.093995155 4.1865974 yes 0.002979325 PE
## 8 21 10 13 55 467 0.0004583576 0.01080712 0.042839312 4.5839539 yes 0.002979325 PE
## 12 33 12 11 1488 804 0.0604981907 0.12416864 0.057211526 0.9222128 yes 0.157295296 Control
## 14 39 11 8 183 103 0.0430709909 0.02443279 0.008779221 0.3641109 yes 0.139980720 Control
## Difference
## NA NA
## 6 -1635
## 7 -846
## 8 -412
## 12 684
## 14 80
## Pseudotime n_samples_Control n_samples_PE Density_Control Density_PE Wilcox_Pvalue Mean_Control Mean_PE expr_score done Adjusted_Pvalue More_Abundant
## 1 0 12 13 228 298 0.4627639706 0.08000466 0.025715500 0.0867085 yes 0.751991452 PE
## 2 3 12 13 185 618 0.6833131587 0.04793879 0.125269165 0.2688532 yes 0.807551915 PE
## 3 NA NA NA NA NA NA NA NA NA <NA> NA <NA>
## 4 9 0 2 0 2 1.0000000000 NA NA 2.2951013 yes 1.000000000 PE
## 5 12 12 13 900 2154 0.6438382008 0.10819060 0.129218095 0.9734493 yes 0.807551915 PE
## 6 15 11 13 514 2149 0.0021360271 0.06711436 0.178501573 2.1993268 yes 0.009256118 PE
## 7 18 11 13 78 924 0.0004091209 0.01457911 0.093995155 4.1865974 yes 0.002979325 PE
## 8 21 10 13 55 467 0.0004583576 0.01080712 0.042839312 4.5839539 yes 0.002979325 PE
## 9 24 12 13 1213 1441 0.6438382008 0.17306431 0.121967513 2.2257067 yes 0.807551915 PE
## 10 27 12 12 1086 940 0.8852339145 0.11221777 0.078759310 2.0126593 yes 0.959003407 Control
## 11 30 9 11 1046 740 0.1965119202 0.10510086 0.062430217 1.5477052 yes 0.364950709 Control
## 12 33 12 11 1488 804 0.0604981907 0.12416864 0.057211526 0.9222128 yes 0.157295296 Control
## 13 36 12 12 2182 1667 0.1572127533 0.16930124 0.109778096 0.7114181 yes 0.340627632 Control
## 14 39 11 8 183 103 0.0430709909 0.02443279 0.008779221 0.3641109 yes 0.139980720 Control
## Difference
## 1 -70
## 2 -433
## 3 NA
## 4 -2
## 5 -1254
## 6 -1635
## 7 -846
## 8 -412
## 9 -228
## 10 146
## 11 306
## 12 684
## 13 515
## 14 80
p2a<-Proportion_DAbundance_sum_expression_in_pseudotime(seu_placenta1, lineages = "Lineage1", conditions = c("Control", "PE"),gest_age = "Early" ,pseudotime_intervals = 3,tropho_stb,genestoplot,expr_factor = 1)
## No cells found for interval 3 . Skipping...
## Pseudotime n_samples_Control n_samples_PE Density_Control Density_PE Wilcox_Pvalue Mean_Control Mean_PE expr_score done Adjusted_Pvalue More_Abundant
## NA NA NA NA NA NA NA NA NA NA <NA> NA <NA>
## 6 15 4 7 88 888 0.01073342 0.029063160 0.213684048 2.7652258 yes 0.06976720 PE
## 7 18 4 7 11 513 0.01073342 0.005105015 0.134150261 4.6787133 yes 0.06976720 PE
## 8 21 4 7 13 137 0.01816302 0.007364155 0.044325094 4.8075377 yes 0.07870641 PE
## 14 39 5 3 80 20 0.03688843 0.035400813 0.007374697 0.3178295 yes 0.11988738 Control
## Difference
## NA NA
## 6 -800
## 7 -502
## 8 -124
## 14 60
## Pseudotime n_samples_Control n_samples_PE Density_Control Density_PE Wilcox_Pvalue Mean_Control Mean_PE expr_score done Adjusted_Pvalue More_Abundant
## 1 0 5 7 150 74 0.25562311 0.159818261 0.025416608 0.09639887 yes 0.41538755 Control
## 2 3 5 7 98 173 0.74533307 0.082733130 0.188390416 0.53818786 yes 0.96893299 PE
## 3 NA NA NA NA NA NA NA NA NA <NA> NA <NA>
## 4 9 0 2 0 2 1.00000000 NA NA 2.29510135 yes 1.00000000 PE
## 5 12 5 7 204 509 0.62611748 0.091496164 0.097974205 1.08535585 yes 0.90439192 PE
## 6 15 4 7 88 888 0.01073342 0.029063160 0.213684048 2.76522577 yes 0.06976720 PE
## 7 18 4 7 11 513 0.01073342 0.005105015 0.134150261 4.67871327 yes 0.06976720 PE
## 8 21 4 7 13 137 0.01816302 0.007364155 0.044325094 4.80753775 yes 0.07870641 PE
## 9 24 5 7 501 691 0.87099119 0.184983211 0.146129228 2.17395921 yes 1.00000000 PE
## 10 27 5 6 634 235 1.00000000 0.120461053 0.045396145 1.54996369 yes 1.00000000 Control
## 11 30 3 5 256 111 0.23303798 0.079148873 0.021416543 0.90644554 yes 0.41538755 Control
## 12 33 5 5 344 118 0.21007504 0.096996469 0.031627849 0.64428721 yes 0.41538755 Control
## 13 36 5 6 483 320 0.08283743 0.147395710 0.077685325 0.48103078 yes 0.21537731 Control
## 14 39 5 3 80 20 0.03688843 0.035400813 0.007374697 0.31782948 yes 0.11988738 Control
## Difference
## 1 76
## 2 -75
## 3 NA
## 4 -2
## 5 -305
## 6 -800
## 7 -502
## 8 -124
## 9 -190
## 10 399
## 11 145
## 12 226
## 13 163
## 14 60
p3a<-Proportion_DAbundance_sum_expression_in_pseudotime(seu_placenta1, lineages = "Lineage1", conditions = c("Control", "PE"),gest_age = "Late", pseudotime_intervals = 3,tropho_stb,genestoplot,expr_factor = 1)
## No cells found for interval 3 . Skipping...
## No cells found for interval 4 . Skipping...
## Pseudotime n_samples_Control n_samples_PE Density_Control Density_PE Wilcox_Pvalue Mean_Control Mean_PE expr_score done Adjusted_Pvalue More_Abundant
## NA NA NA NA NA NA NA NA NA NA <NA> NA <NA>
## NA.1 NA NA NA NA NA NA NA NA NA <NA> NA <NA>
## 8 21.04979 6 6 38 325 0.01306523 0.0117548 0.04059447 4.52664 yes 0.1567827 PE
## Difference
## NA NA
## NA.1 NA
## 8 -287
## Pseudotime n_samples_Control n_samples_PE Density_Control Density_PE Wilcox_Pvalue Mean_Control Mean_PE expr_score done Adjusted_Pvalue More_Abundant
## 1 0.0497923 7 6 79 230 0.83032426 0.02316665 0.026688460 0.08234964 yes 0.9058083 PE
## 2 3.0497923 7 6 86 439 0.83032426 0.02291398 0.051003454 0.13068479 yes 0.9058083 PE
## 3 NA NA NA NA NA NA NA NA NA <NA> NA <NA>
## 4 NA NA NA NA NA NA NA NA NA <NA> NA <NA>
## 5 12.0497923 7 6 705 1665 0.83032426 0.12152602 0.167894545 0.94051534 yes 0.9058083 PE
## 6 15.0497923 7 6 421 1257 0.10041249 0.08886009 0.137223242 1.89571236 yes 0.4016500 PE
## 7 18.0497923 7 6 67 400 0.03831876 0.01973499 0.045665823 3.68056458 yes 0.2299126 PE
## 8 21.0497923 6 6 38 325 0.01306523 0.01175480 0.040594473 4.52663981 yes 0.1567827 PE
## 9 24.0497923 7 6 718 758 0.72098486 0.16593457 0.094771129 2.26724496 yes 0.9058083 PE
## 10 27.0497923 7 6 457 707 0.94305667 0.10635471 0.113021014 2.36135889 yes 0.9430567 PE
## 11 30.0497923 6 6 794 626 0.81018124 0.11952059 0.095424715 1.69935966 yes 0.9058083 Control
## 12 33.0497923 7 6 1166 706 0.17473582 0.14573872 0.080810465 0.98173188 yes 0.5242075 Control
## 13 36.0497923 7 6 1678 1331 0.83032426 0.18196133 0.140155744 0.76887975 yes 0.9058083 Control
## 14 39.0497923 6 5 87 72 0.52281665 0.01316838 0.008096325 0.36973994 yes 0.9058083 Control
## Difference
## 1 -151
## 2 -353
## 3 NA
## 4 NA
## 5 -960
## 6 -836
## 7 -333
## 8 -287
## 9 -40
## 10 -250
## 11 168
## 12 460
## 13 347
## 14 15
p2a+p3a
## Warning: Removed 1 row containing missing values or values outside the scale range (`geom_vline()`).
## Warning: Removed 1 row containing missing values or values outside the scale range (`geom_line()`).
## Removed 1 row containing missing values or values outside the scale range (`geom_line()`).
## Warning: Removed 2 rows containing missing values or values outside the scale range (`geom_vline()`).
## Warning: Removed 2 rows containing missing values or values outside the scale range (`geom_line()`).
## Removed 2 rows containing missing values or values outside the scale range (`geom_line()`).
seu_placenta1
seu_placenta2<-subset(seu_placenta1, umap_1 < 5)
# Create the initial ggplot
p <- ggplot(seu_placenta2, aes(x = umap_1, y = umap_2, col = scaled_scores)) +
geom_point() +
scale_color_viridis_c(option = "C")
# Add the geom_path layer to the ggplot
curves <- slingCurves(seu_placenta, as.df = TRUE)
curves2<-subset(curves, Order > 38)
p1 <- p +
geom_path(data = curves2 %>% arrange(Order),
aes(group = Lineage), col = "green", size = 1.5,
arrow = arrow(type = "closed", length = unit(0.15, "inches"))) +
theme_classic() +
labs(col = "Lineage") +
#scale_color_brewer(palette = "Set1")+
geom_point(data = curves2 %>%
group_by(Lineage) %>%
slice(round(seq(1, n(), length.out = 7))),
aes( size = 4),col = "green") +
theme_classic() +
labs(col = "Lineage")
#scale_color_brewer(palette = "Set1")
p1
#ppp4<-ggplot(seu_placenta1, aes(x = umap_1, y = umap_2, col = scaled_scores)) +
# geom_point() +
# scale_color_viridis_c(option = "C")
#ppp4
genestoplot <- c("HTRA4","HTRA1","FSTL3","TMEM45A","EGLN3")
seu_placenta2$pseudotime<-seu_placenta2$Lineage1
seu_placenta2$lineages<-"Lineage1"
p1a<-DAbundance_sum_expression_in_pseudotime(seu_placenta2, lineages = "Lineage1", gest_age = NULL,conditions = c("Control", "PE"), pseudotime_intervals = 3,tropho_stb,genestoplot,expr_factor = 1)
## Pseudotime n_samples_Control n_samples_PE Density_Control Density_PE Wilcox_Pvalue Mean_Control Mean_PE expr_score done Adjusted_Pvalue More_Abundant
## 2 14.7469 11 13 599 2287 0.0276943716 54.454545 175.92308 2.033027 yes 0.092314572 PE
## 3 17.7469 11 13 91 1007 0.0003801491 8.272727 77.46154 4.042130 yes 0.003801491 PE
## 4 20.7469 10 13 62 515 0.0010798255 6.200000 39.61538 4.533899 yes 0.005399128 PE
## Difference
## 2 -1688
## 3 -916
## 4 -453
## Pseudotime n_samples_Control n_samples_PE Density_Control Density_PE Wilcox_Pvalue Mean_Control Mean_PE expr_score done Adjusted_Pvalue More_Abundant
## 1 11.7469 12 13 795 1886 0.4462712558 66.250000 145.07692 0.9307012 yes 0.763925505 PE
## 2 14.7469 11 13 599 2287 0.0276943716 54.454545 175.92308 2.0330270 yes 0.092314572 PE
## 3 17.7469 11 13 91 1007 0.0003801491 8.272727 77.46154 4.0421301 yes 0.003801491 PE
## 4 20.7469 10 13 62 515 0.0010798255 6.200000 39.61538 4.5338991 yes 0.005399128 PE
## 5 23.7469 12 13 1095 1379 0.8065268415 91.250000 106.07692 2.2523024 yes 0.896140935 PE
## 6 26.7469 12 12 1142 912 0.7505170643 95.166667 76.00000 1.9937810 yes 0.896140935 Control
## 7 29.7469 9 11 1031 796 0.4472408649 114.555556 72.36364 1.6088530 yes 0.763925505 Control
## 8 32.7469 12 11 1352 697 0.5587598229 112.666667 63.36364 0.9525701 yes 0.798228318 Control
## 9 35.7469 12 12 2268 1709 0.9080327383 189.000000 142.41667 0.7414424 yes 0.908032738 Control
## 10 38.7469 11 10 310 203 0.4583553030 28.181818 20.30000 0.3955224 yes 0.763925505 Control
## Difference
## 1 -1091
## 2 -1688
## 3 -916
## 4 -453
## 5 -284
## 6 230
## 7 235
## 8 655
## 9 559
## 10 107
p2a<-DAbundance_sum_expression_in_pseudotime(seu_placenta2, lineages = "Lineage1", conditions = c("Control", "PE"),gest_age = "Early" ,pseudotime_intervals = 3,tropho_stb,genestoplot,expr_factor = 1)
## Pseudotime n_samples_Control n_samples_PE Density_Control Density_PE Wilcox_Pvalue Mean_Control Mean_PE expr_score done Adjusted_Pvalue More_Abundant
## 3 17.7469 4 7 12 555 0.01358654 3.0 79.28571 4.603160 yes 0.1358654 PE
## 4 20.7469 4 7 14 155 0.03547995 3.5 22.14286 4.718515 yes 0.1773997 PE
## Difference
## 3 -543
## 4 -141
## Pseudotime n_samples_Control n_samples_PE Density_Control Density_PE Wilcox_Pvalue Mean_Control Mean_PE expr_score done Adjusted_Pvalue More_Abundant
## 1 11.7469 5 7 187 442 0.87099119 37.40000 63.14286 0.9911097 yes 0.9677680 PE
## 2 14.7469 4 7 103 897 0.15637572 25.75000 128.14286 2.6192571 yes 0.3909393 PE
## 3 17.7469 4 7 12 555 0.01358654 3.00000 79.28571 4.6031599 yes 0.1358654 PE
## 4 20.7469 4 7 14 155 0.03547995 3.50000 22.14286 4.7185146 yes 0.1773997 PE
## 5 23.7469 5 7 434 672 1.00000000 86.80000 96.00000 2.2336472 yes 1.0000000 PE
## 6 26.7469 5 6 678 240 0.78225207 135.60000 40.00000 1.5439314 yes 0.9677680 Control
## 7 29.7469 3 5 259 119 0.54859547 86.33333 23.80000 0.9766815 yes 0.7837078 Control
## 8 32.7469 5 5 326 95 0.40339530 65.20000 19.00000 0.5942921 yes 0.6837465 Control
## 9 35.7469 5 6 478 337 0.41024790 95.60000 56.16667 0.5388545 yes 0.6837465 Control
## 10 38.7469 5 4 123 32 0.11134689 24.60000 8.00000 0.2604051 yes 0.3711563 Control
## Difference
## 1 -255
## 2 -794
## 3 -543
## 4 -141
## 5 -238
## 6 438
## 7 140
## 8 231
## 9 141
## 10 91
p3a<-DAbundance_sum_expression_in_pseudotime(seu_placenta2, lineages = "Lineage1", conditions = c("Control", "PE"),gest_age = "Late", pseudotime_intervals = 3,tropho_stb,genestoplot,expr_factor = 1)
## Pseudotime n_samples_Control n_samples_PE Density_Control Density_PE Wilcox_Pvalue Mean_Control Mean_PE expr_score done Adjusted_Pvalue More_Abundant
## 2 15.88236 7 6 277 1009 0.012299213 39.571429 168.16667 2.36611 yes 0.05534646 PE
## 3 18.88236 7 6 50 383 0.006273708 7.142857 63.83333 4.03582 yes 0.05534646 PE
## Difference
## 2 -732
## 3 -333
## Pseudotime n_samples_Control n_samples_PE Density_Control Density_PE Wilcox_Pvalue Mean_Control Mean_PE expr_score done Adjusted_Pvalue More_Abundant
## 1 12.88236 7 6 885 2076 0.224638639 126.428571 346.00000 1.0350638 yes 0.37866788 PE
## 2 15.88236 7 6 277 1009 0.012299213 39.571429 168.16667 2.3661098 yes 0.05534646 PE
## 3 18.88236 7 6 50 383 0.006273708 7.142857 63.83333 4.0358196 yes 0.05534646 PE
## 4 21.88236 7 6 595 815 0.252445252 85.000000 135.83333 2.6044763 yes 0.37866788 PE
## 5 24.88236 7 6 291 288 0.174735823 41.571429 48.00000 2.2077441 yes 0.37866788 Control
## 6 27.88236 7 6 538 793 0.224638639 76.857143 132.16667 2.2929353 yes 0.37866788 PE
## 7 30.88236 7 6 743 474 0.617075077 106.142857 79.00000 1.4327672 yes 0.69420946 Control
## 8 33.88236 7 6 1469 1043 0.520316801 209.857143 173.83333 0.9702918 yes 0.66897874 Control
## 9 36.88236 7 6 1283 966 0.943056671 183.285714 161.00000 0.6454614 yes 0.94305667 Control
## Difference
## 1 -1191
## 2 -732
## 3 -333
## 4 -220
## 5 3
## 6 -255
## 7 269
## 8 426
## 9 317
p1a+p2a+p3a
p2a+p3a
genestoplot <- c("HTRA4","HTRA1","FSTL3","TMEM45A","EGLN3")
seu_placenta1$pseudotime<-seu_placenta1$Lineage1
seu_placenta1$lineages<-"Lineage1"
#p1a<-Proportion_DAbundance_sum_expression_in_pseudotime(seu_placenta2, lineages = "Lineage1", gest_age = NULL,conditions = c("Control", "PE"), pseudotime_intervals = 3,tropho_stb,genestoplot,expr_factor = 0.1)
p2a<-Proportion_DAbundance_sum_expression_in_pseudotime(seu_placenta2, lineages = "Lineage1", conditions = c("Control", "PE"),gest_age = "Early" ,pseudotime_intervals = 3,tropho_stb,genestoplot,expr_factor = 1)
## Pseudotime n_samples_Control n_samples_PE Density_Control Density_PE Wilcox_Pvalue Mean_Control Mean_PE expr_score done Adjusted_Pvalue More_Abundant
## 2 14.7469 4 7 103 897 0.01073342 0.040785302 0.277044432 2.6192571 yes 0.05366708 PE
## 3 17.7469 4 7 12 555 0.01073342 0.006799318 0.192204471 4.6031599 yes 0.05366708 PE
## 4 20.7469 4 7 14 155 0.02975807 0.012883839 0.083213953 4.7185146 yes 0.07342771 PE
## 8 32.7469 5 5 326 95 0.03671386 0.102247716 0.024603369 0.5942921 yes 0.07342771 Control
## 9 35.7469 5 6 478 337 0.05523425 0.200614206 0.095327414 0.5388545 yes 0.09205709 Control
## 10 38.7469 5 4 123 32 0.01996445 0.081029512 0.008507792 0.2604051 yes 0.06654818 Control
## Difference
## 2 -794
## 3 -543
## 4 -141
## 8 231
## 9 141
## 10 91
## Pseudotime n_samples_Control n_samples_PE Density_Control Density_PE Wilcox_Pvalue Mean_Control Mean_PE expr_score done Adjusted_Pvalue More_Abundant
## 1 11.7469 5 7 187 442 1.00000000 0.142330078 0.110557671 0.9911097 yes 1.00000000 PE
## 2 14.7469 4 7 103 897 0.01073342 0.040785302 0.277044432 2.6192571 yes 0.05366708 PE
## 3 17.7469 4 7 12 555 0.01073342 0.006799318 0.192204471 4.6031599 yes 0.05366708 PE
## 4 20.7469 4 7 14 155 0.02975807 0.012883839 0.083213953 4.7185146 yes 0.07342771 PE
## 5 23.7469 5 7 434 672 0.87099119 0.239458971 0.167855265 2.2336472 yes 1.00000000 PE
## 6 26.7469 5 6 678 240 0.92726447 0.137127312 0.055494053 1.5439314 yes 1.00000000 Control
## 7 29.7469 3 5 259 119 0.55098499 0.081362396 0.024378527 0.9766815 yes 0.78712141 Control
## 8 32.7469 5 5 326 95 0.03671386 0.102247716 0.024603369 0.5942921 yes 0.07342771 Control
## 9 35.7469 5 6 478 337 0.05523425 0.200614206 0.095327414 0.5388545 yes 0.09205709 Control
## 10 38.7469 5 4 123 32 0.01996445 0.081029512 0.008507792 0.2604051 yes 0.06654818 Control
## Difference
## 1 -255
## 2 -794
## 3 -543
## 4 -141
## 5 -238
## 6 438
## 7 140
## 8 231
## 9 141
## 10 91
p3a<-Proportion_DAbundance_sum_expression_in_pseudotime(seu_placenta2, lineages = "Lineage1", conditions = c("Control", "PE"),gest_age = "Late", pseudotime_intervals = 3,tropho_stb,genestoplot,expr_factor = 1)
## Pseudotime n_samples_Control n_samples_PE Density_Control Density_PE Wilcox_Pvalue Mean_Control Mean_PE expr_score done Adjusted_Pvalue More_Abundant
## 2 15.88236 7 6 277 1009 0.03831876 0.07238716 0.12482628 2.36611 yes 0.1724344 PE
## 3 18.88236 7 6 50 383 0.02680913 0.01662058 0.04841309 4.03582 yes 0.1724344 PE
## Difference
## 2 -732
## 3 -333
## Pseudotime n_samples_Control n_samples_PE Density_Control Density_PE Wilcox_Pvalue Mean_Control Mean_PE expr_score done Adjusted_Pvalue More_Abundant
## 1 12.88236 7 6 885 2076 0.72098486 0.16179616 0.24251174 1.0350638 yes 0.8111080 PE
## 2 15.88236 7 6 277 1009 0.03831876 0.07238716 0.12482628 2.3661098 yes 0.1724344 PE
## 3 18.88236 7 6 50 383 0.02680913 0.01662058 0.04841309 4.0358196 yes 0.1724344 PE
## 4 21.88236 7 6 595 815 0.83032426 0.13813626 0.10623763 2.6044763 yes 0.8303243 PE
## 5 24.88236 7 6 291 288 0.72098486 0.08711023 0.03922031 2.2077441 yes 0.8111080 Control
## 6 27.88236 7 6 538 793 0.72098486 0.09819989 0.14112687 2.2929353 yes 0.8111080 PE
## 7 30.88236 7 6 743 474 0.52031680 0.10032333 0.07009519 1.4327672 yes 0.8111080 Control
## 8 33.88236 7 6 1469 1043 0.35311123 0.17707654 0.12417391 0.9702918 yes 0.8111080 Control
## 9 36.88236 7 6 1283 966 0.43203489 0.14834985 0.10339498 0.6454614 yes 0.8111080 Control
## Difference
## 1 -1191
## 2 -732
## 3 -333
## 4 -220
## 5 3
## 6 -255
## 7 269
## 8 426
## 9 317
#p1a+p2a+p3a
p2a+p3a